Causal Inference Practical Methods

R
DiD
RD
IV
Causal Inference
with R
Author
Affiliation

Xinzhuo Huang

HKUST SOSC

Published

May 15, 2023

Modified

November 21, 2023

Setup

Code
require("pacman")

p_load(
    tibble, dplyr, furrr, purrr, tidyr, stringr, rio, ggplot2, knitr, broom, kableExtra, lfe, plm, ggthemes, skimr, ggpmisc, AER, dagitty, ggdag, rdrobust, rddensity, did
)

print_kable <- \(result, title) {
    kable(
        result,
        digits = 3,
        caption = title,
        booktabs = TRUE
    )
}

Fixed Effects

Linear Dummy Variabel Model

Fixed effects model shows that the beer tax does reduce fatalities.

Code
fatality <- import(
    "E:/OneDrive - HKUST Connect/assignment/assignment3/fatality.csv",
    setclass = "tibble"
)

fatality %>%
    ggplot(aes(x = beertax, y = fatality_rate)) +
    geom_point(aes(color = state), size = 2.3) +
    geom_smooth(
        aes(group = state, color = state),
        method = "lm",
        se = FALSE
    ) +
    geom_smooth(method = "lm", linetype = "dashed", color = "#8E8E93") +
    xlab("Beer Tax") +
    ylab("Fatalities") +
    ggthemes::theme_hc() +
    theme(panel.grid.minor = element_blank(), legend.position = "none")

The relationship between the beer tax and fatality
Code
linear_dummy_formula <- fatality %>%
    pull(state) %>%
    sprintf("`%s`", .) %>%
    unique() %>%
    str_c(collapse = " + ") %>%
    str_c("fatality_rate ~ 0 + beertax + ", ., collapse = " + ") %>%
    as.formula()

linear_dummy <- fatality %>%
    pivot_wider(
        names_from = state,
        values_from = state
    ) %>%
    mutate(
        across(3:50, ~ str_replace(., "\\w+", "1")),
        across(3:50, ~ replace_na(., "0")),
        across(3:50, as.numeric)
    ) %>%
    lm(
        linear_dummy_formula,
        data = .
    )

linear_dummy %>%
    tidy() %>%
    slice(1) %>%
    print_kable(title = "Linear dummy variable model")
Linear dummy variable model
term estimate std.error statistic p.value
beertax -0.656 0.188 -3.491 0.001

Fixed effect in standard packages

Code
fixed_effect_plm <- fatality %>%
    pdata.frame(index = c("state")) %>%
    plm(
        fatality_rate ~ beertax,
        data = .,
        model = "within",
        effect = "individual"
    )

fixed_clustered_plm <- fixed_effect_plm %>%
    tidy() %>%
    mutate(
        category = "fixed effects with plm",
        `std.error` = vcovHC(
            fixed_effect_plm,
            type = "HC1",
            cluster = "group"
        ) |>
            diag() |>
            sqrt(),
        statistic = coef(fixed_effect_plm) / `std.error`,
        `p.value` = 2 * pt(
            abs(statistic),
            df = fixed_effect_plm$df.residual,
            lower.tail = FALSE
        )
    )

fixed_clustered_lfe <- fatality %>%
    felm(
        fatality_rate ~ beertax | state,
        cluster = "state",
        data = .
    ) %>%
    tidy() %>%
    mutate(category = "fixed effects with lfe")

fixed_clustered_lfe %>%
    bind_rows(fixed_clustered_plm) %>%
    print_kable(title = "Fixed effect in standard packages")
Fixed effect in standard packages
term estimate std.error statistic p.value category
beertax -0.656 0.292 -2.247 0.029 fixed effects with lfe
beertax -0.656 0.289 -2.271 0.024 fixed effects with plm

DiD

DiD as a linear regression with interaction

Code
did_interaction <- cartheft %>%
    lm(
        cartheft ~ treatment + postattack + treatment * postattack,
        data = .
    ) %>%
    tidy() %>%
    slice(4) %>%
    mutate(term = "Same-Block Police")

did_interaction %>%
    print_kable(., title = "linear regression with interaction")
linear regression with interaction
term estimate std.error statistic p.value
Same-Block Police -0.078 0.027 -2.847 0.004

DiD asd fixed effect

Code
did_fixed <- cartheft %>%
    felm(
        cartheft ~ treatment * postattack | blockid + month,
        data = .
    ) %>%
    tidy() %>%
    slice(3) %>%
    mutate(term = "Same-Block * Postattack")

did_fixed %>%
    print_kable(title = "Two way fixed effect model")
Two way fixed effect model
term estimate std.error statistic p.value
Same-Block * Postattack -0.078 0.026 -2.992 0.003

Time-invariant variables in fixed effect regressions

We cannot estimate the coefficient for time-invariant variables since they are absorbed by the individual fixed effects.

Code
did_fixed_invariant <- cartheft %>%
    felm(
        cartheft ~ treatment * postattack + bank | blockid + month,
        data = .
    ) %>%
    tidy() %>%
    slice(3:4) %>%
    mutate(term = c("Bank", "Same-Block * Postattack"))

did_fixed_invariant %>%
    print_kable(title = "Time-invariant variables in regression")
Time-invariant variables in regression
term estimate std.error statistic p.value
Bank NaN NA NaN NaN
Same-Block * Postattack -0.078 0.026 -2.992 0.003

Instrumental Variable

\[\begin{align*} \log(wage) = \beta_{0} + \beta_{1}educ \end{align*}\] \[\begin{align*} educ = \gamma_{0} + \gamma_{1}educ \end{align*}\]

IV estimate

The IV estimator is larger than the OLS estimator. Since in this study, IV is used to address endogeneity by providing a source of variation in the predictor variable that is independent of the error term. In that case, IV estimate accounts for endogeneity and provides a more reliable estimate of the true causal effect of the predictor variable on the outcome variable. The OLS estimate, on the other hand, may be biased and inconsistent in the presence of endogeneity, leading to an underestimate of the true effect size.

Code
dag <- tribble(
    ~name, ~label, ~x, ~y,
    "educ", "Educ", 4, 1,
    "wage", "Wage", 5, 1,
    "e", "Confounders", 4.5, 1.5,
    "Z", "Instrumental Variable", 3, 1
)

node_labels <- dag$label
names(node_labels) <- dag$name

status_colors <- c(
    exposure = "#00BFFF",
    outcome = "#FF7F50",
    latent = "#D3D3D3"
)

dagify(
    wage ~ educ + e,
    educ ~ e,
    educ ~ Z,
    exposure = "educ",
    outcome = "wage",
    latent = "e",
    coords = dag,
    labels = node_labels
) %>%
    tidy_dagitty() %>%
    node_status() %>%
    ggplot(aes(x = x, y = y, xend = xend, yend = yend)) +
    geom_dag_edges() +
    geom_dag_point(aes(color = status)) +
    geom_dag_label_repel(
        aes(label = label, fill = status),
        color = "white",
        fontface = "bold"
    ) +
    scale_color_manual(values = status_colors, na.value = "grey20") +
    scale_fill_manual(values = status_colors, na.value = "grey20") +
    guides(color = FALSE, fill = FALSE) +
    theme_dag()

Directed Acyclic Graphs
Code
card <- import(
    "E:/OneDrive - HKUST Connect/assignment/assignment3/card.csv",
    setclass = "tibble"
)

ols <- card %>%
    lm(
        lwage ~ educ,
        data = .
    ) %>%
    coeftest(vcovHC, "HC1") %>%
    tidy() %>%
    slice(2) %>%
    mutate(method = "OLS with robust std.error")

iv <- card %>%
    ivreg(
        lwage ~ educ | nearc4,
        data = .
    ) %>%
    coeftest(vcovHC, "HC1") %>%
    tidy() %>%
    slice(2) %>%
    mutate(method = "IV with robust std.error")

bind_rows(ols, iv) %>%
    print_kable(title = "Comparison between OLS and IV")
Comparison between OLS and IV
term estimate std.error statistic p.value method
educ 0.052 0.003 17.902 0 OLS with robust std.error
educ 0.188 0.026 7.189 0 IV with robust std.error

Manual 2SLS

Code
first_stage <- card %>%
    lm(
        educ ~ nearc4,
        data = .
    )

manual <- card %>%
    mutate(educ = predict(first_stage)) %>%
    lm(
        lwage ~ educ,
        data = .
    ) %>%
    coeftest(vcovHC, "HC1") %>%
    tidy() %>%
    slice(2) %>%
    mutate(method = "Manual 2SLS with robust std.error")

bind_rows(manual, iv, ols) %>%
    print_kable(title = "Comparison between OLS, IV and manual 2SLS")
Comparison between OLS, IV and manual 2SLS
term estimate std.error statistic p.value method
educ 0.188 0.021 9.149 0 Manual 2SLS with robust std.error
educ 0.188 0.026 7.189 0 IV with robust std.error
educ 0.052 0.003 17.902 0 OLS with robust std.error

Weak instrument

The F statistics from the first-stage regression is larger than 10.

Code
waldtest(
    lm(lwage ~ educ + nearc4, card),
    lm(lwage ~ educ, card)
) %>%
    tidy() %>%
    rename(F.statistic = statistic) %>%
    print_kable(title = "Weak instrument test")
Weak instrument test
term df.residual df F.statistic p.value
lwage ~ educ + nearc4 3007 NA NA NA
lwage ~ educ 3008 -1 48.451 0

Interpretation of effect

  • Compliers are the individuals whose decision to attend college is influenced by the instrumental variable. In this study, compliers are the students who would attend college if they lived close to one and would not attend college if they lived far away. These students’ decisions are directly impacted by the geographical distance to college.
  • Always-takers are the individuals who would attend college regardless of the geographical distance. These students would pursue higher education even if they lived far away from a college.

Sharp RD

RD plot

We can observe a clear difference around the cutoff.

Code
senate <- import(
    "E:/OneDrive - HKUST Connect/assignment/assignment3/senate.csv",
    setclass = "tibble"
) %>%
    mutate(cutoff = if_else(margin > 0, 1, 0))

senate %>% 
    mutate(across(everything(), ~ round(., 2))) %>% 
    DT::datatable()

Raw RD plot

Code
senate %>%
    ggplot(aes(x = margin, y = vote, color = as.factor(cutoff))) +
    geom_point(
        size = 1,
        alpha = 0.5,
        position = position_jitter(
            width = 0,
            height = 0.25
        )
    ) +
    geom_smooth(
        data = filter(senate, margin > 0),
        aes(x = margin, y = vote),
        method = "lm",
        color = "#8E8E93"
    ) +
    geom_smooth(
        data = filter(senate, margin < 0),
        aes(x = margin, y = vote),
        method = "lm",
        color = "#8E8E93"
    ) +
    scale_color_manual(values = c("#FF7F50", "#00BFFF")) +
    geom_vline(
        xintercept = 0,
        linetype = "dotdash",
        linewidth = 1
    ) +
    ggthemes::theme_hc() +
    theme(
        legend.position = "none"
    ) +
    xlab("Winning margin of a Democratic party senator") +
    ylab("Share of vote of the senator in election")

Raw RD plot
Code
rdplot(
    y = senate$vote,
    x = senate$margin,
    nbins = 10,
    x.label = "Winning margin of a Democratic party senator",
    y.label = "Share of vote of the senator in election",
    title = ""
)

Binned RD plot

Density Test

Code
density_plot <- rddensity(
    senate$margin,
    c = 0
) %>%
    rdplotdensity(
        rdd = .,
        X = senate$margin,
        type = "both"
    ) %>%
    pluck("Estplot")

Density Test

RD bandwidth selection

The obtained bandwidth is 24.969. We estimate the RD effect using three different bandwidth values (ideal, twice the ideal, and half of the ideal) to determine if the value is appropriate. As we can see, the coefficients change substantially. It is considered a big bandwidth since it includes more observations in our analysis.

Code
rdbwselect(
    y = senate$vote,
    x = senate$margin,
    c = 0,
    p = 3,
    kernel = "triangular",
    bwselect = "mserd"
) %>%
    pluck("bws") %>%
    as_tibble() %>%
    set_names(
        c("BW est left", "BW est right", "BW bias left", "BW bias right")
    ) %>%
    print_kable(title = "RD bandwidth selection")
RD bandwidth selection
BW est left BW est right BW bias left BW bias right
24.969 24.969 37.221 37.221
Code
rd_models <- c(24.969, 24.969 * 2, 24.969 / 2) %>%
    map(~ rdrobust(
        y = senate$vote,
        x = senate$margin,
        p = 3,
        h = .
    ))

extract_se_es <- \(x) {
    tibble(
        "Estimate" = pluck(x, "Estimate")[1],
        "Se" = pluck(x, "se")[1],
        "P-Value" = pluck(x, "pv")[1]
    )
}

rd_models %>%
    map_dfr(extract_se_es) %>%
    mutate(
        Bandwidth = c(
            "24.969 (ideal)",
            "49.938 (twice)",
            "12.485 (half)"
        ),
        .before = 1
    ) %>%
    print_kable(title = "Sensitivity analysis using different bandwidths")
Sensitivity analysis using different bandwidths
Bandwidth Estimate Se P-Value
24.969 (ideal) 9.126 2.314 0
49.938 (twice) 7.676 1.781 0
12.485 (half) 13.872 3.126 0

Estimate Sharp RD treatment effect

The model result shows that there is an incumbency advantage.

Code
sharp_rd <- rdrobust(
    y = senate$vote,
    x = senate$margin,
    h = 24.96899,
    p = 3
)

tibble(
    "Estimate" = pluck(sharp_rd, "Estimate")[1],
    "Se" = pluck(sharp_rd, "se")[1],
    "P-Value" = pluck(sharp_rd, "pv")[1]
) %>%
    print_kable(title = "Sharp RD treatment effect")
Sharp RD treatment effect
Estimate Se P-Value
9.126 2.314 0

Varying orders of polynomial regression

Models with different bandwidth all support the same conclusion.

Code
c(1:4) %>%
    map(~ rdrobust(
        y = senate$vote,
        x = senate$margin,
        p = .,
        h = 24.96899
    )) %>%
    map_dfr(extract_se_es) %>%
    mutate(
        `Local-polynomial` = 1:4,
        .before = 1
    ) %>%
    print_kable("Varying orders of polynomial regression")
Varying orders of polynomial regression
Local-polynomial Estimate Se P-Value
1 7.103 1.264 0
2 7.854 1.797 0
3 9.126 2.314 0
4 11.556 2.882 0